################################################
# Clean replication code for Tandeo 2022: 2SLS Robustness
# Created AJH
# Created 5/5/2022
# Last edited 5/5/2022
################################################
library(tidyverse)
library(aod)
library(stargazer)
load("chapter_5_rewarding_responsiveness/db_precinct.Rdata")
# source code to calculate conley standard errors
source("chapter_5_rewarding_responsiveness/conley-se-master/code/conley.R")

# create database of only election years
  elec <- db %>% filter(year %in% c("2006", "2009", "2012", "2015", "2018","2021"))


    # create variables for first year rationed
    first_year_rationed <- db %>%
      filter(tandeo==1) %>% 
      group_by(cve_secc) %>% 
      summarize(first_year_rationed = min(year)) %>% 
      ungroup()
    
    secciones <-db %>% 
      group_by(cve_secc) %>%
      slice(1) %>%
      select(cve_secc) %>% 
      ungroup()
    
    
    secciones <- left_join(secciones, first_year_rationed, by = "cve_secc")
    secciones$first_year_rationed <- ifelse(is.na(secciones$first_year_rationed), 2022, secciones$first_year_rationed)
    
    db <- left_join(db, secciones, by ="cve_secc")

    
    # 1: Unbalanced panel that drops observations after the first year of inclusion on rationing lists
    db_r1 <- db %>% filter(year <=first_year_rationed) %>% 
      filter(year %in% c("2006", "2009", "2012", "2015", "2018","2021"))
      
    # Model R1: 
    modelr1 <- felm(ivs_jd ~ wealth_index_dryness_std + flood_risk_dryness_std + green_space_std_dryness_std +
                     wealth_index_lag1_dryness_std + flood_risk_lag1_dryness_std + green_space_std_lag1_dryness_std|year + cve_secc | (tandeo ~ d2z_std_dryness_std + d2z_std_lag1_dryness_std + alt_std_dryness_std + alt_std_lag1_dryness_std)|lat + lon, data = db_r1,keepCX=TRUE)
    
    # Conley SEs for first stage
    SE_modelr1_1s <-ConleySEs(reg = modelr1$stage1,
                             unit = "cve_secc", 
                             time = "year",
                             lat = "lat", lon = "lon",
                             dist_fn = "SH",
                             dist_cutoff = 60, 
                             lag_cutoff = 1,
                             cores = 1, 
                             verbose = FALSE) 
    vcov_modelr1_1s <- SE_modelr1_1s$Spatial
    f_modelr1_1s <- wald.test(b = coef(modelr1$stage1), Sigma = vcov_modelr1_1s, Terms =7:10)$result$chi2[1] %>% round(2)
    fvector_modelr1 <- c("First stage F on Excluded Instruments", f_modelr1_1s, "", "")
    
    ses_modelr1_1s  <- sapply(abs(SE_modelr1_1s$Spatial), sqrt) %>% round(3)
    # standard errors for 2sls
    SE_modelr1 <-ConleySEs(reg = modelr1,
                          unit = "cve_secc", 
                          time = "year",
                          lat = "lat", lon = "lon",
                          dist_fn = "SH",
                          dist_cutoff = 60, 
                          lag_cutoff = 1,
                          cores = 1, 
                          verbose = FALSE) 
    vcov_modelr1 <- SE_modelr1$Spatial
    ses_modelr1  <- sapply(abs(SE_modelr1$Spatial), sqrt) %>% round(3)
    
    # reduced form 
    rf_modelr1 <- felm(ivs_jd ~ d2z_std_dryness_std + d2z_std_lag1_dryness_std +alt_std_dryness_std+alt_std_lag1_dryness_std+ wealth_index_dryness_std + flood_risk_dryness_std + green_space_std_dryness_std +
                        wealth_index_lag1_dryness_std + flood_risk_lag1_dryness_std + green_space_std_lag1_dryness_std|year + cve_secc |0|lat + lon, data = db_r1,keepCX=TRUE)
    # Conley SEs
    SE_rf_modelr1 <-ConleySEs(reg = rf_modelr1,
                             unit = "cve_secc", 
                             time = "year",
                             lat = "lat", lon = "lon",
                             dist_fn = "SH",
                             dist_cutoff = 60, 
                             lag_cutoff = 1,
                             cores = 1, 
                             verbose = FALSE) 
    vcov_rf_modelr1 <- SE_rf_modelr1$Spatial
    ses_rf_modelr1 <- sapply(abs(SE_rf_modelr1$Spatial), sqrt) %>% round(3)
    
    # reduced form 
    rf_modelr1 <- felm(ivs_jd ~ d2z_std_dryness_std + d2z_std_lag1_dryness_std +alt_std_dryness_std+alt_std_lag1_dryness_std+ wealth_index_dryness_std + flood_risk_dryness_std + green_space_std_dryness_std +
                        wealth_index_lag1_dryness_std + flood_risk_lag1_dryness_std + green_space_std_lag1_dryness_std|year + cve_secc |0|lat + lon, data = db_r1,keepCX=TRUE)
    # Conley SEs
    SE_rf_modelr1 <-ConleySEs(reg = rf_modelr1,
                             unit = "cve_secc", 
                             time = "year",
                             lat = "lat", lon = "lon",
                             dist_fn = "SH",
                             dist_cutoff = 60, 
                             lag_cutoff = 1,
                             cores = 1, 
                             verbose = FALSE) 
    vcov_rf_modelr1 <- SE_rf_modelr1$Spatial
    ses_rf_modelr1 <- sapply(abs(SE_rf_modelr1$Spatial), sqrt) %>% round(3)
    
    stargazer(modelr1$stage1,  rf_modelr1,modelr1, 
              type= "text",
              add.lines= list(fvector_modelr1,c("Controls", "Yes", "Yes", "Yes")),
              se = list(ses_modelr1_1s,ses_rf_modelr1,ses_modelr1),
              keep.stat = c("n"),
              no.space = TRUE,
              dep.var.labels = c("Rationing","Incumbent Party Vote Share"),
              column.labels = c("First Stage","Reduced Form","2sls"),
              model.numbers          = FALSE,
              dep.var.caption = "",
              title = "Unbalanced Panel Where Observations are Dropped One Year After Treatment",
              notes = c("Year and precinct fixed effects", "Conley standard errors (10 km)", "Controls for green space, flood risk" ,"and wealth interacted with dryness" ),
              keep = c("tandeo", "d2z_std_dryness_std","d2z_std_lag1_dryness_std", "alt_std_dryness_std", "alt_std_lag1_dryness_std"),
              order = c("tandeo", "d2z_std_dryness_std","d2z_std_lag1_dryness_std", "alt_std_dryness_std", "alt_std_lag1_dryness_std"),
              covariate.labels =  c("Rationing (Instrumented)",
                                    "Dist to center x dryness (t)",
                                    "Dist to center x dryness (t-1)",
                                    "Altitude x dryness (t)",
                                    "Altitude x dryness (t-1)",
                                    "Wealth index x dryness (t)", 
                                    "Wealth index x dryness (t-1)", 
                                    "Flood risk x dryness (t)",
                                    "Flood risk x dryness (t-1)",
                                    "Green space x dryness (t)",
                                    "Green space x dryness (t-1)"),
              digits=3,
              label = "tab:modelr1",
              out = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/3_Tables/4_Spring_2022/modelr1.tex")
  
    
    # 2: Balanced panel, in which once a neighborhood has appeared on the rationing list, it is considered treated in all subsequent years   
    db_r2 <- db %>% 
      filter(year %in% c("2006", "2009", "2012", "2015", "2018","2021"))
    db_r2$tandeo <- ifelse(db_r2$year >= db_r2$first_year_rationed, 1, db$tandeo)
    
    # Model r2: 
    modelr2 <- felm(ivs_jd ~ wealth_index_dryness_std + flood_risk_dryness_std + green_space_std_dryness_std +
                      wealth_index_lag1_dryness_std + flood_risk_lag1_dryness_std + green_space_std_lag1_dryness_std|year + cve_secc | (tandeo ~ d2z_std_dryness_std + d2z_std_lag1_dryness_std + alt_std_dryness_std + alt_std_lag1_dryness_std)|lat + lon, data = db_r2,keepCX=TRUE)
    
    # Conley SEs for first stage
    SE_modelr2_1s <-ConleySEs(reg = modelr2$stage1,
                              unit = "cve_secc", 
                              time = "year",
                              lat = "lat", lon = "lon",
                              dist_fn = "SH",
                              dist_cutoff = 60, 
                              lag_cutoff = 1,
                              cores = 1, 
                              verbose = FALSE) 
    vcov_modelr2_1s <- SE_modelr2_1s$Spatial
    f_modelr2_1s <- wald.test(b = coef(modelr2$stage1), Sigma = vcov_modelr2_1s, Terms =7:10)$result$chi2[1] %>% round(2)
    fvector_modelr2 <- c("First stage F on Excluded Instruments", f_modelr2_1s, "", "")
    
    ses_modelr2_1s  <- sapply(abs(SE_modelr2_1s$Spatial), sqrt) %>% round(3)
    # standard errors for 2sls
    SE_modelr2 <-ConleySEs(reg = modelr2,
                           unit = "cve_secc", 
                           time = "year",
                           lat = "lat", lon = "lon",
                           dist_fn = "SH",
                           dist_cutoff = 60, 
                           lag_cutoff = 1,
                           cores = 1, 
                           verbose = FALSE) 
    vcov_modelr2 <- SE_modelr2$Spatial
    ses_modelr2  <- sapply(abs(SE_modelr2$Spatial), sqrt) %>% round(3)
  
    
    
    # reduced form 
    rf_modelr2 <- felm(ivs_jd ~ d2z_std_dryness_std + d2z_std_lag1_dryness_std +alt_std_dryness_std+alt_std_lag1_dryness_std+ wealth_index_dryness_std + flood_risk_dryness_std + green_space_std_dryness_std +
                         wealth_index_lag1_dryness_std + flood_risk_lag1_dryness_std + green_space_std_lag1_dryness_std|year + cve_secc |0|lat + lon, data = elec,keepCX=TRUE)
    # Conley SEs
    SE_rf_modelr2 <-ConleySEs(reg = rf_modelr2,
                              unit = "cve_secc", 
                              time = "year",
                              lat = "lat", lon = "lon",
                              dist_fn = "SH",
                              dist_cutoff = 60, 
                              lag_cutoff = 1,
                              cores = 1, 
                              verbose = FALSE) 
    vcov_rf_modelr2 <- SE_rf_modelr2$Spatial
    ses_rf_modelr2 <- sapply(abs(SE_rf_modelr2$Spatial), sqrt) %>% round(3)
    
    # reduced form 
    rf_modelr2 <- felm(ivs_jd ~ d2z_std_dryness_std + d2z_std_lag1_dryness_std +alt_std_dryness_std+alt_std_lag1_dryness_std+ wealth_index_dryness_std + flood_risk_dryness_std + green_space_std_dryness_std +
                         wealth_index_lag1_dryness_std + flood_risk_lag1_dryness_std + green_space_std_lag1_dryness_std|year + cve_secc |0|lat + lon, data = db_r2,keepCX=TRUE)
    # Conley SEs
    SE_rf_modelr2 <-ConleySEs(reg = rf_modelr2,
                              unit = "cve_secc", 
                              time = "year",
                              lat = "lat", lon = "lon",
                              dist_fn = "SH",
                              dist_cutoff = 60, 
                              lag_cutoff = 1,
                              cores = 1, 
                              verbose = FALSE) 
    vcov_rf_modelr2 <- SE_rf_modelr2$Spatial
    ses_rf_modelr2 <- sapply(abs(SE_rf_modelr2$Spatial), sqrt) %>% round(3)
    
    stargazer(modelr2$stage1,  rf_modelr2,modelr2, 
              type= "text",
              add.lines= list(fvector_modelr2,c("Controls", "Yes", "Yes", "Yes")),
              se = list(ses_modelr2_1s,ses_rf_modelr2,ses_modelr2),
              keep.stat = c("n"),
              no.space = TRUE,
              dep.var.labels = c("Rationing","Incumbent Party Vote Share"),
              column.labels = c("First Stage","Reduced Form","2sls"),
              model.numbers          = FALSE,
              dep.var.caption = "",
              title = "Balanced Panel Where Observations are Counted As Treated Every Year After First Inclusion on Rationing List",
              notes = c("Year and precinct fixed effects", "Conley standard errors (10 km)", "Controls for green space, flood risk" ,"and wealth interacted with dryness" ),
              keep = c("tandeo", "d2z_std_dryness_std","d2z_std_lag1_dryness_std", "alt_std_dryness_std", "alt_std_lag1_dryness_std"),
              order = c("tandeo", "d2z_std_dryness_std","d2z_std_lag1_dryness_std", "alt_std_dryness_std", "alt_std_lag1_dryness_std"),
              covariate.labels =  c("Rationing (Instrumented)",
                                    "Dist to center x dryness (t)",
                                    "Dist to center x dryness (t-1)",
                                    "Altitude x dryness (t)",
                                    "Altitude x dryness (t-1)",
                                    "Wealth index x dryness (t)", 
                                    "Wealth index x dryness (t-1)", 
                                    "Flood risk x dryness (t)",
                                    "Flood risk x dryness (t-1)",
                                    "Green space x dryness (t)",
                                    "Green space x dryness (t-1)"),
              digits=3,
              label = "tab:modelr2",
              out = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/3_Tables/4_Spring_2022/modelr2.tex")
    
    rm(list=setdiff(ls(), c("db","elec", "Val_XeeXhC", "ConleySEs", "DistMat", "TimeDist", "XeeXhC", "XeeXhC_Lg")))
    
    # Simulating the results using different Conley distance cutoffs 
    # Basically, here we want to calculate the se's for the 2sls result using a variety of distance cutoffs, 
    # and plot their distribution as it compares to clustered standard errors. I use the main spec
    
    model3 <- felm(ivs_jd ~ wealth_index_dryness_std + flood_risk_dryness_std + green_space_std_dryness_std +
                     wealth_index_lag1_dryness_std + flood_risk_lag1_dryness_std + green_space_std_lag1_dryness_std|year + cve_secc | (tandeo ~ d2z_std_dryness_std + d2z_std_lag1_dryness_std + alt_std_dryness_std + alt_std_lag1_dryness_std)|lat + lon, data = elec,keepCX=TRUE)
    
    get_se_by_dist_cutoff <- function(dist){
    # standard errors for 2sls
    SE_model3 <-ConleySEs(reg = model3,
                          unit = "cve_secc", 
                          time = "year",
                          lat = "lat", lon = "lon",
                          dist_fn = "SH",
                          dist_cutoff = dist, 
                          lag_cutoff = 1,
                          cores = 1, 
                          verbose = FALSE) 
    vcov_model3 <- SE_model3$Spatial
    ses_model3  <- sapply(abs(SE_model3$Spatial), sqrt) %>% round(3)
    return(ses_model3[7])
    }
    # create index of values for the distance cutoff
    distances <- seq(5,65, 5)
    conley_ses_by_dist <- lapply(distances, get_se_by_dist_cutoff) 
    
    # get clustered se
    model4 <- felm(ivs_jd ~ wealth_index_dryness_std + flood_risk_dryness_std + green_space_std_dryness_std +
                     wealth_index_lag1_dryness_std + flood_risk_lag1_dryness_std + green_space_std_lag1_dryness_std|year + cve_secc | (tandeo ~ d2z_std_dryness_std + d2z_std_lag1_dryness_std + alt_std_dryness_std + alt_std_lag1_dryness_std)|cve_secc, data = elec)
    clustered_se <-model4$se[7] 

      p1 <-    ggplot()+ 
      geom_point(aes(x= unlist(distances), y = unlist(conley_ses_by_dist))) +
      theme_bw()+
      geom_point(aes(x= 25,y=clustered_se), color ="grey70")+ 
      geom_text(aes(x= 35,y=clustered_se), color ="grey50", label = "Neighborhood \n  clustered", size =3.5)+
      geom_point(aes(x= 60,y=unlist(conley_ses_by_dist)[12]), color ="grey50")+
      geom_text(aes(x= 60,y=0.035), color ="grey50", label = "Results \n in paper", size =3.5)+
      labs(x= "Distance (KM)", 
           y ="Std. Error on Rationing",
           title = "Variation in  Conley Standard Errors \n Using Different Distance Cutoffs", 
           note= "Plots the Conley Standard Error using Different distance cutoffs")
      ggsave(p1, file = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/1_Plots/5_Spring2022/graphics/conley_by_dist.jpg", width = 8)
      
    
    ###################################
    # Main spec for reference purposes
    ###################################
    
    
    # Model 3: 
    # Instrument: dist to center x dryness t-1; dist to center x dryness t; alt x dryness t; alt x dryness t-1
    # Outcome: ivs alcalde 
    # Standard errors: conley 
    # Controls
    model3 <- felm(ivs_jd ~ wealth_index_dryness_std + flood_risk_dryness_std + green_space_std_dryness_std +
                     wealth_index_lag1_dryness_std + flood_risk_lag1_dryness_std + green_space_std_lag1_dryness_std|year + cve_secc | (tandeo ~ d2z_std_dryness_std + d2z_std_lag1_dryness_std + alt_std_dryness_std + alt_std_lag1_dryness_std)|lat + lon, data = elec,keepCX=TRUE)
    
    # Conley SEs for first stage
    SE_model3_1s <-ConleySEs(reg = model3$stage1,
                             unit = "cve_secc", 
                             time = "year",
                             lat = "lat", lon = "lon",
                             dist_fn = "SH",
                             dist_cutoff = 60, 
                             lag_cutoff = 1,
                             cores = 1, 
                             verbose = FALSE) 
    vcov_model3_1s <- SE_model3_1s$Spatial
    f_model3_1s <- wald.test(b = coef(model3$stage1), Sigma = vcov_model3_1s, Terms =7:10)$result$chi2[1] %>% round(2)
    fvector_model3 <- c("First stage F on Excluded Instruments", f_model3_1s, "", "")
    
    ses_model3_1s  <- sapply(abs(SE_model3_1s$Spatial), sqrt) %>% round(3)
    # standard errors for 2sls
    SE_model3 <-ConleySEs(reg = model3,
                          unit = "cve_secc", 
                          time = "year",
                          lat = "lat", lon = "lon",
                          dist_fn = "SH",
                          dist_cutoff = 60, 
                          lag_cutoff = 1,
                          cores = 1, 
                          verbose = FALSE) 
    vcov_model3 <- SE_model3$Spatial
    ses_model3  <- sapply(abs(SE_model3$Spatial), sqrt) %>% round(3)
    # reduced form 
    rf_model3 <- felm(ivs_jd ~ d2z_std_dryness_std + d2z_std_lag1_dryness_std +alt_std_dryness_std+alt_std_lag1_dryness_std+ wealth_index_dryness_std + flood_risk_dryness_std + green_space_std_dryness_std +
                        wealth_index_lag1_dryness_std + flood_risk_lag1_dryness_std + green_space_std_lag1_dryness_std|year + cve_secc |0|lat + lon, data = elec,keepCX=TRUE)
    # Conley SEs
    SE_rf_model3 <-ConleySEs(reg = rf_model3,
                             unit = "cve_secc", 
                             time = "year",
                             lat = "lat", lon = "lon",
                             dist_fn = "SH",
                             dist_cutoff = 60, 
                             lag_cutoff = 1,
                             cores = 1, 
                             verbose = FALSE) 
    vcov_rf_model3 <- SE_rf_model3$Spatial
    ses_rf_model3 <- sapply(abs(SE_rf_model3$Spatial), sqrt) %>% round(3)
    
    stargazer(model3$stage1,  rf_model3,model3, 
              type= "text",
              add.lines= list(fvector_model3,c("Controls", "Yes", "Yes", "Yes")),
              se = list(ses_model3_1s,ses_rf_model3,ses_model3),
              keep.stat = c("n"),
              no.space = TRUE,
              dep.var.labels = c("Rationing","Incumbent Party Vote Share"),
              column.labels = c("First Stage","Reduced Form","2sls"),
              model.numbers          = FALSE,
              dep.var.caption = "",
              title = "Support for Local Mayor: 2sls Results",
              notes = c("Year and precinct fixed effects", "Conley standard errors (10 km)", "Controls for green space, flood risk" ,"and wealth interacted with dryness" ),
              keep = c("tandeo", "d2z_std_dryness_std","d2z_std_lag1_dryness_std", "alt_std_dryness_std", "alt_std_lag1_dryness_std"),
              order = c("tandeo", "d2z_std_dryness_std","d2z_std_lag1_dryness_std", "alt_std_dryness_std", "alt_std_lag1_dryness_std"),
              covariate.labels =  c("Rationing (Instrumented)",
                                    "Dist to center x dryness (t)",
                                    "Dist to center x dryness (t-1)",
                                    "Altitude x dryness (t)",
                                    "Altitude x dryness (t-1)",
                                    "Wealth index x dryness (t)", 
                                    "Wealth index x dryness (t-1)", 
                                    "Flood risk x dryness (t)",
                                    "Flood risk x dryness (t-1)",
                                    "Green space x dryness (t)",
                                    "Green space x dryness (t-1)"),
              digits=3,
              label = "tab:model3",
              out = "/Users/alyssahuberts/Dropbox/2_mx_water/1_Tandeo/3_Output/3_Tables/4_Spring_2022/model3.tex")
    
    